library(tidyverse)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(reshape2)
library(dplyr)
library(network)
library(ggraph)
library(GGally)
library(sna)
library(ggplot2)
library(ggnetwork)
library(ggraph)
library(tidygraph)
library(ggsci)
library(data.table)
library(reshape2)
library(tidymodels)
library(furrr)
library(R.matlab)
library(BiocParallel)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(gtools)
library(graphlayouts)
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/ast/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"
resources_dir = params$resources_dir
# BN folders
data_prefix = "ast_m19"
bn_results_dir = "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/"
BN_run_dir = "/pastel/resources/bayesian_networks/CINDERellA/"
BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
cat("Preparing phenotypes...")
## Preparing phenotypes...
### phenotype
phenotype = readRDS(paste0(resources_dir, "phenotypes/basic_Apr2022.rds"))
phenotype_dt = phenotype$data
metadata = phenotype$data
### Create custom phenotypes and includes resilience
phenotype_dt$ad_dementia_status = ifelse(phenotype_dt$cogdx_3gp<3,0,1)
phenotype_dt$cognitive_impairment_status = ifelse(phenotype_dt$cogdx_3gp<2,0,1)
phenotype_dt$gpath_sqrt = sqrt(phenotype_dt$gpath)
phenotype_dt$amyloid_sqrt = sqrt(phenotype_dt$amyloid)
phenotype_dt$tangles_sqrt = sqrt(phenotype_dt$tangles)
phenotype_dt$nft_sqrt = sqrt(phenotype_dt$nft)
phenotype_dt$plaq_d_sqrt = sqrt(phenotype_dt$plaq_d)
phenotype_dt$plaq_n_sqrt = sqrt(phenotype_dt$plaq_n)
phenotype_dt$tdp_43_binary = phenotype_dt$tdp_st2
phenotype_dt$dxpark_status = phenotype_dt$dxpark-1
resilience = read.csv(paste0(resources_dir, "resilience/resilience_capuano_july2022/R719_CR_ROSMAP.csv"))
resilience$projid2 = sprintf("%08d", resilience$projid) # Add leading zeros
phenotype_dt = phenotype_dt %>% dplyr::left_join(resilience[,-1], by = c("projid"="projid2"))
# save(phenotype_dt, file = paste0(bn_results_dir,"phenotypes.RData"))
cat("Saved:", paste0(bn_results_dir,"phenotypes.RData"))
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/phenotypes.RData
Expression data
cat("Loading SE modules...")
## Loading SE modules...
mod2plot = 19 # Module we want to plot
# pval_cutoff = 0.05 # p-value cutoff for the BN
top_n_genes = 100 # Number of top genes to include in the BN
# Expression data for a single set:
exprData = read.table(paste0(expression_dir, "ast.txt"), header = T, stringsAsFactors = F, check.names = F)
expr_matx = as.data.frame(exprData) # Residuals of the expression
gene_modules = read.table(paste0(net_dir, "geneBycluster.txt"), header = T, stringsAsFactors = F) # clusters from SpeakEasy
# Select the expression values for the module of interest
to_plot = gene_modules$ensembl[gene_modules$cluster_lv3 == mod2plot]
expr_matx_mod = expr_matx[to_plot, ]
# save(expr_matx_mod, file = paste0(bn_results_dir,"dataExp.RData"))
cat("Selecting modules based on association results...")
## Selecting modules based on association results...
load(paste0(bn_results_dir,"phenotypes.RData"))
load(paste0(bn_results_dir,"dataExp.RData"))
phenotype_match = phenotype_dt[match(colnames(expr_matx_mod),phenotype_dt$projid),]
expr_matx_mod_t <- as.matrix(t(expr_matx_mod))
#identical(rownames(data4linear_reg),phenotype_match$projid) # TRUE
#Covariates
covariates_ = c( "cogng_demog_slope",
"tangles_sqrt",
"amyloid_sqrt")
adj_by = c("msex","age_death")
data4linear_reg = cbind(expr_matx_mod_t,phenotype_match[,covariates_,drop=F],phenotype_match[,adj_by,drop=F])
scale_data = TRUE
matrix_pvalue = matrix(NA, nrow = length(covariates_), ncol = ncol(expr_matx_mod_t))
for (x in 1:length(covariates_)){
for (y in 1:ncol(expr_matx_mod_t)){
x_cov = covariates_[x]
y_mod = colnames(data4linear_reg)[y]
if(scale_data){
form = as.formula(paste(paste0("scale(",y_mod,")"),"~",paste0("scale(",x_cov,")"),"+",paste(adj_by,collapse = "+")))
}else{
form = as.formula(paste(y_mod,"~",x_cov,"+",paste(adj_by,collapse = "+")))
}
matrix_pvalue[x,y] <- glance(summary( lm(form, data = data4linear_reg) ))$p.value
}
}
rownames(matrix_pvalue) = covariates_
colnames(matrix_pvalue) = colnames(expr_matx_mod_t)
matrix_pvalue_adj = matrix(p.adjust(as.vector(as.matrix(matrix_pvalue)), method='bonferroni'),ncol=ncol(matrix_pvalue))
# signifMods = colnames(matrix_pvalue)[colSums(matrix_pvalue_adj<=pval_cutoff)>0]
matrix_pvalue_m = reshape2::melt(as.matrix(matrix_pvalue)) %>%
group_by(Var2) %>% summarise(pval = min(value)) %>% arrange(pval)
top_genes = as.character(matrix_pvalue_m$Var2[1:top_n_genes])
# save(signifMods, file = paste0(bn_results_dir,"selected_dataExp.RData"))
cat("Saved:", paste0(bn_results_dir,"selected_dataExp.RData"))
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/selected_dataExp.RData
# Save the input expression for the BN
# write_csv(as.data.frame(expr_matx_mod)[top_genes,], file = BN_run_data, col_names = F)
cat("Saved:", BN_run_data)
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/ast_m19_exp.txt
# CINDERellA has two inputs: exp.txt and output_folder
setwd(BN_run_dir)
cmd_matlab_call = paste0("matlab -nodisplay -nojvm -nosplash -nodesktop -r")
cmd_matlab_param = paste0("runt=1000; data='",BN_run_data,"'; out_dir='",BN_output_dir,"'; run('",BN_run_dir,"CINDERellA.m')")
cmd_matlab_run = paste0(cmd_matlab_call, " \"",cmd_matlab_param,"\"")
cat(cmd_matlab_run)
system(cmd_matlab_run)
## [1] "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/ast_modules/ast_m19/ast_m19_res"
Read results
Edges filtered
# BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
# BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
# load(file = paste0(bn_results_dir,data_prefix,"_BN_input.RData")) # phenotype_match,gene_net_metadata,mod_eigengen
# load(file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))
edgefrq = read_tsv(paste0(BN_output_dir,"/edgefrq.txt"), col_names = c("A","B","freq"), show_col_types = F)
dataExp = expr_matx_mod
edges_df = na.omit(cbind(edgefrq, rownames(dataExp)[edgefrq$A], rownames(dataExp)[edgefrq$B]))
colnames(edges_df) = c("fromNode", "toNode", "weight", "fromAltName", "toAltName") # match names for Cytoscape input
edges_df$fromAltName = gsub("(.*)\\.(.*)","\\1",edges_df$fromAltName)
edges_df$toAltName = gsub("(.*)\\.(.*)","\\1",edges_df$toAltName)
edges_filtered = edges_df[abs(edges_df$weight)>0.3, ] # weight default = 0.33
rownames(edges_filtered) = NULL
# createDT(edges_filtered %>% arrange(-weight))
Nodes filtered
nodes_table = data.frame(nodeName = 1:nrow(dataExp), altName = gsub("(.*)\\.(.*)","\\1",rownames(dataExp))) %>% distinct()
nodes_table$altName = gsub("(.*)\\.(.*)","\\1",nodes_table$altName)
rownames(nodes_table) = NULL
nodes_table = na.omit(unique(nodes_table)) %>% left_join(unique(gene_modules[,c("ensembl","gene_name")]), by = c("altName"="ensembl"))
nodes_filtered = nodes_table[nodes_table$altName %in% unique(c(edges_filtered$fromAltName,edges_filtered$toAltName)), ]
nodes_filtered = nodes_filtered[! duplicated(nodes_filtered$altName), ]
createDT(nodes_filtered %>% left_join(reshape2::melt(as.matrix(matrix_pvalue)), by = c("altName"="Var2")) %>%
group_by(altName) %>% mutate(best_pval = min(value),
best_pheno = Var1[which.min(value)],
pvalues = paste0(formatC(value, format = "e", digits = 2), collapse = ";"),
phenotypes = paste0(Var1, collapse = ";")) %>%
select(nodeName, altName, gene_name, best_pval, best_pheno, pvalues, phenotypes) %>%
distinct() %>% arrange(best_pval))
BN plot
# Cairo::CairoPDF(file = "/pastel/Github_scripts/SpeakEasy_dlpfc/figures4paper/v2_mar2024/bn_ast19.pdf", width = 16, height = 10)
plot_geneBN(edges_filtered = edges_filtered,
nodes_filtered = nodes_filtered)
## Using `stress` as default layout

Session
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggforce_0.3.3 igraph_1.5.1 graphlayouts_0.8.0
## [4] gtools_3.9.4 RColorBrewer_1.1-3 circlize_0.4.14
## [7] ComplexHeatmap_2.11.1 BiocParallel_1.28.3 R.matlab_3.6.2
## [10] furrr_0.3.1 future_1.33.0 yardstick_1.2.0
## [13] workflowsets_1.0.1 workflows_1.1.3 tune_1.1.2
## [16] rsample_1.2.0 recipes_1.0.8 parsnip_1.1.1
## [19] modeldata_1.2.0 infer_1.0.5 dials_1.2.0
## [22] scales_1.2.1 broom_1.0.5 tidymodels_1.1.1
## [25] data.table_1.14.8 ggsci_2.9 tidygraph_1.2.0
## [28] ggnetwork_0.5.10 sna_2.6 statnet.common_4.5.0
## [31] GGally_2.1.2 ggraph_2.0.5 network_1.17.1
## [34] reshape2_1.4.4 kableExtra_1.3.4 forcats_0.5.1
## [37] stringr_1.5.0 dplyr_1.1.3 purrr_1.0.2
## [40] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [43] ggplot2_3.4.4 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] shadowtext_0.1.1 readxl_1.3.1 backports_1.4.1
## [4] systemfonts_1.0.4 plyr_1.8.9 splines_4.1.2
## [7] crosstalk_1.2.0 listenv_0.9.0 digest_0.6.33
## [10] foreach_1.5.2 htmltools_0.5.7 viridis_0.6.2
## [13] fansi_1.0.5 magrittr_2.0.3 cluster_2.1.2
## [16] doParallel_1.0.17 tzdb_0.4.0 globals_0.16.2
## [19] modelr_0.1.8 gower_1.0.1 matrixStats_1.0.0
## [22] vroom_1.6.4 R.utils_2.12.2 svglite_2.1.0
## [25] hardhat_1.3.0 colorspace_2.1-0 rvest_1.0.2
## [28] ggrepel_0.9.4 haven_2.4.3 xfun_0.41
## [31] crayon_1.5.2 jsonlite_1.8.7 survival_3.2-13
## [34] iterators_1.0.14 glue_1.6.2 polyclip_1.10-6
## [37] gtable_0.3.4 ipred_0.9-14 webshot_0.5.2
## [40] GetoptLong_1.0.5 shape_1.4.6 future.apply_1.11.0
## [43] BiocGenerics_0.40.0 DBI_1.1.3 Rcpp_1.0.11
## [46] viridisLite_0.4.2 clue_0.3-60 bit_4.0.5
## [49] GPfit_1.0-8 DT_0.30 stats4_4.1.2
## [52] lava_1.7.2.1 prodlim_2023.08.28 htmlwidgets_1.6.2
## [55] httr_1.4.7 ellipsis_0.3.2 pkgconfig_2.0.3
## [58] reshape_0.8.9 R.methodsS3_1.8.2 farver_2.1.1
## [61] nnet_7.3-16 sass_0.4.7 dbplyr_2.4.0
## [64] utf8_1.2.4 labeling_0.4.3 tidyselect_1.2.0
## [67] rlang_1.1.2 DiceDesign_1.9 munsell_0.5.0
## [70] cellranger_1.1.0 tools_4.1.2 cachem_1.0.8
## [73] cli_3.6.1 generics_0.1.3 evaluate_0.23
## [76] fastmap_1.1.1 yaml_2.3.7 bit64_4.0.5
## [79] knitr_1.45 fs_1.6.3 R.oo_1.25.0
## [82] xml2_1.3.5 compiler_4.1.2 rstudioapi_0.15.0
## [85] png_0.1-8 reprex_2.0.1 lhs_1.1.6
## [88] tweenr_1.0.2 bslib_0.5.1 stringi_1.7.12
## [91] highr_0.10 lattice_0.20-45 Matrix_1.6-1.1
## [94] vctrs_0.6.4 pillar_1.9.0 lifecycle_1.0.3
## [97] GlobalOptions_0.1.2 jquerylib_0.1.4 R6_2.5.1
## [100] gridExtra_2.3 IRanges_2.28.0 parallelly_1.36.0
## [103] codetools_0.2-18 MASS_7.3-54 rjson_0.2.21
## [106] withr_2.5.2 S4Vectors_0.32.4 parallel_4.1.2
## [109] hms_1.1.3 rpart_4.1-15 timeDate_4022.108
## [112] coda_0.19-4 class_7.3-19 rmarkdown_2.25
## [115] lubridate_1.8.0